# PRODUCE DESCRIPTIVE EXHIBITS

# SETUP ----

# Restart R (to avoid package clashes)
rm(list = ls())
.rs.restartR()

pacman::p_load(tidyverse, cowplot, viridis, ggplot2, lfe, kableExtra, fixest, sf, scales, lubridate, pdftools, tinytex, marginaleffects, modelsummary, fst, qs, MatchIt, fipio, tinytable)

source("code/globals.R")

data <- qread(file.path(WORKING, "analysisdataset.qs"))

# Apply same restriction we use for regressions
regs <- data %>% filter(sample_main)

# DESCRIPTIVE RESULTS ----

## Descriptive statistics table ----

regs <- regs %>%
  mutate(
    destroyed_num = as.numeric(destroyed),
    wildfire_hazard_pct = wildfire_hazard * 100
  )

vardist <- datasummary(
  destroyed_num + combinedyear + slope + LotSizeAcres + sqfeet1000 + TotalBedrooms + elev + wildfire_hazard_pct ~
    Mean + SD + P5 + Median + P95,
  data = regs,
  fmt = fmt_significant(3),
  output = "data.frame"
)

# Use the data dictionary to rename variables
vardist <- vardist %>%
  mutate(` ` = dict_names[as.character(` `)])


# Panel to count the number of homes in each treatment group
counts_tgroups <- regs %>%
  transmute(
    Jurisdiction = factor(treatmentgroup, levels = c("SRA", "LRA-VHFHSZ", "No-codes")),
    Vintage = case_when(
      period == 0 ~ "Before 1998",
      period == 1 ~ "1998--2007",
      period == 2 ~ "2008--2016"
    )
  ) %>%
  mutate(Vintage = factor(Vintage, levels = c("Before 1998", "1998--2007", "2008--2016"))) %>%
  count(Jurisdiction, Vintage) %>%
  mutate(`Number of homes` = scales::comma(n)) %>%
  select(-n)


# Combine these two tables
names(counts_tgroups) <- names(vardist)[1:3]
summ_stats <- bind_rows(vardist, counts_tgroups)

# Replace all NAs with blanks
summ_stats[is.na(summ_stats)] <- ""

kbl(summ_stats, 
    format = "latex",
    escape = F, 
    booktabs = T,
    align = "lcccccc",
    linesep = "") %>%
  group_rows(sprintf("Panel A. Variables (Homes = %s)", comma(nrow(regs))), 1, 8, bold = F, italic = T) %>%
  group_rows("Panel B. Homes by Jurisdiction and Vintage", 9, 17, bold = F, italic = T, extra_latex_after = "\\hspace{1em}Jurisdiction & Vintage & N \\\\ \n\\midrule \n") %>%
  write(file.path(RES, "descriptive-statistics.tex"))


# Count proportion that are SRA (for R2 Response text)
regs %>%
  summarize(mean(treatmentgroup == "SRA"))

## Appx Exhibit: Number of neighbors by distance ----

# Count all neighbors within each distance
regs <- regs %>%
  mutate(
    neighbors_walls_0 = Dwalls0_post + Dwalls0_pre,
    neighbors_walls_10 = Dwalls10_post + Dwalls10_pre,
    neighbors_walls_20 = Dwalls20_post + Dwalls20_pre,
    neighbors_walls_30 = Dwalls30_post + Dwalls30_pre,
    neighbors_walls_40 = Dwalls40_post + Dwalls40_pre,
    neighbors_walls_50 = Dwalls50_post + Dwalls50_pre,
    neighbors_walls_60 = Dwalls60_post + Dwalls60_pre,
    neighbors_walls_70 = Dwalls70_post + Dwalls70_pre,
    neighbors_walls_80 = Dwalls80_post + Dwalls80_pre,
    neighbors_walls_90 = Dwalls90_post + Dwalls90_pre,
    neighbors_centroids_0 = Dcentroids0_post + Dcentroids0_pre,
    neighbors_centroids_10 = Dcentroids10_post + Dcentroids10_pre,
    neighbors_centroids_20 = Dcentroids20_post + Dcentroids20_pre,
    neighbors_centroids_30 = Dcentroids30_post + Dcentroids30_pre,
    neighbors_centroids_40 = Dcentroids40_post + Dcentroids40_pre,
    neighbors_centroids_50 = Dcentroids50_post + Dcentroids50_pre,
    neighbors_centroids_60 = Dcentroids60_post + Dcentroids60_pre,
    neighbors_centroids_70 = Dcentroids70_post + Dcentroids70_pre,
    neighbors_centroids_80 = Dcentroids80_post + Dcentroids80_pre,
    neighbors_centroids_90 = Dcentroids90_post + Dcentroids90_pre
  )


regs %>% count(neighbors_walls_0)


# Average neighbors within in 0-10m
mean(regs$neighbors_walls_0, na.rm = T) # 0.51

# What proportion are pre-code?
mean(regs$Dwalls0_pre, na.rm = T) / mean(regs$neighbors_walls_0, na.rm = T) # 82%

# Total neighbors in 0-30m
mean(regs$neighbors_walls_0 + regs$neighbors_walls_10 + regs$neighbors_walls_20, na.rm = T) # 1.6

# Total neighbors in 0-30m (centroids)
mean(regs$neighbors_centroids_0 + regs$neighbors_centroids_10 + regs$neighbors_centroids_20, na.rm = T) # 1.7


# Keeping only neighbor variables, transform to long
neighbors_long <- regs %>%
  select(
    ImportParcelID, BuildingOrImprovementNumber, incidentid,
    matches("^D[a-z0-9]*_post"), matches("neighbors_walls"), matches("neighbors_centroids")
  ) %>%
  pivot_longer(
    cols = -c(ImportParcelID, BuildingOrImprovementNumber, incidentid),
    names_to = "neighbor_type", names_prefix = "neighbors_", values_to = "num_neighbors"
  )

# Compute the splits we need: by group (To-code or all homes) and by walls/centroids
neighbors_long <- neighbors_long %>%
  mutate(
    bin_low = str_extract(neighbor_type, "[0-9]+"),
    group = case_when(
      grepl("post", neighbor_type) ~ "Post-code",
      TRUE ~ "All"
    ),
    neighbor_type = case_when(
      grepl("walls", neighbor_type) ~ "Walls",
      grepl("centroids", neighbor_type) ~ "Centroids"
    )
  )

# Relabel bins
neighbors_long <- neighbors_long %>%
  mutate(Distance = sprintf("%g-%gm", as.numeric(bin_low), as.numeric(bin_low) + 10))

# Squish values
neighbors_long <- neighbors_long %>%
  mutate(Neighbors = oob_squish(num_neighbors, range = c(0, 2)))


# Faceted histograms for all neighbors
p_all <- ggplot(
  neighbors_long %>% filter(group == "All", complete.cases(Neighbors)),
  aes(x = Neighbors)
) +
  geom_histogram(binwidth = 1, color = "white", position = "dodge") +
  facet_grid(neighbor_type ~ Distance) +
  scale_x_continuous(name = "Number of neighbors", breaks = c(0, 2), labels = c("0", "2+")) +
  scale_fill_viridis_d(option = "turbo") +
  scale_y_continuous(name = "Homes", labels = comma) +
  theme_cowplot() +
  theme(strip.background = element_blank())

p_all


save_plot(filename = file.path(RES, "neighbor-histograms.pdf"), plot = p_all, base_height = 6, base_asp = 1.5)

# Faceted histogram for to-code neighbors
# Not in paper
p_to_code <- p_all %+% (neighbors_long %>% filter(group == "To-code", complete.cases(Neighbors)))




## Proportion homes destroyed by vintage + histogram -----
hist <- regs %>%
  dplyr::filter(!is.na(combinedyear)) %>%
  mutate(combinedyear = case_when(
    combinedyear < 1935 ~ 1935,
    combinedyear > 2016 ~ 2016,
    TRUE ~ combinedyear
  )) %>%
  group_by(combinedyear) %>%
  summarize(
    sharelost = mean(destroyed),
    numhomes = n()
  )


top <- ggplot(data = hist, aes(x = combinedyear)) +
  geom_point(aes(y = sharelost), color = "red", size = 2.5, alpha = 0.5) +
  geom_line(aes(y = sharelost), color = "red", size = 1, alpha = 0.5) +
  globalplotoptions +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    axis.line.x = element_blank(),
    plot.margin = margin(0, 0, 16, 0, unit = "pt")
  ) + # bottom margin on the top plot controls spacing between the two gridded plots
  scale_y_continuous(name = "Share Destroyed", breaks = seq(0, 1, 0.2), limits = c(0, 1))

bottom <- ggplot(data = hist, aes(x = combinedyear)) +
  geom_col(aes(y = numhomes), fill = "gray", alpha = 0.9) +
  scale_x_continuous(name = "Effective Year Built", breaks = seq(1935, 2015, 5), labels = c("<1935", seq(1940, 2010, 5), "2015+")) +
  scale_y_continuous(name = "Homes", breaks = c(0, 2000)) +
  globalplotoptions +
  theme(axis.text.x = element_text(angle = 45, size = 14, hjust = 1))

p <- cowplot::plot_grid(top, bottom, align = "v", ncol = 1, rel_heights = c(0.55, 0.45))
save_plot(file.path(RES, "summary_timeseries.pdf"), plot = p)


### Overall stats on Number of Homes and Share Destroyed for paper and slides:
regs %>%
  # filter(!is.na(combinedyear)) %>%
  ungroup() %>%
  summarize(mean(destroyed), n())

### How many fires in the dataset
regs %>%
  # filter(!is.na(combinedyear)) %>%
  group_by(incidentid) %>%
  filter(row_number() == 1) %>%
  ungroup() %>%
  summarize(n())

### Years spanned
regs %>%
  filter(!is.na(combinedyear)) %>%
  ungroup() %>%
  summarize(min(fireyear), max(fireyear))

## Table of fires and share destroyed ----

regs %>%
  distinct(incidentid, .keep_all = T) %>%
  count(State)

regs %>%
  filter(State == "Colorado") %>%
  count(incidentname, FIPS)

cafires <- length(unique(regs$incidentid[regs$State == "California"]))
excludedfires <- unique(regs$incidentid[regs$regime == "exclude_due_to_local_codes"])

table <- regs %>%
  mutate(year = year(incidentstartdate)) %>%
  group_by(incidentid) %>%
  summarize(
    name = first(incidentname),
    state = first(State),
    year = mean(year),
    numdestroyed = sum(destroyed),
    totalinperimeter = n(),
    sharedestroyed = mean(destroyed)
  ) %>%
  mutate(ca = state == "California") %>%
  arrange(desc(ca), desc(numdestroyed)) %>%
  mutate(name = if_else(name == "Witch2007", "Witch", name))

getabbr <- function(x) `state.abb`[which(`state.name` == x)]

fulltable <- table %>%
  arrange(desc(ca), state, desc(year), desc(numdestroyed)) %>%
  mutate(
    year = as.character(year),
    state = case_when(
      state == "California" ~ "CA",
      state == "Oregon" ~ "OR",
      state == "Washington" ~ "WA",
      state == "Arizona" ~ "AZ",
      state == "Colorado" ~ "CO"
    ),
    name = case_when(
      incidentid %in% excludedfires ~ paste0(name, "$^\\dagger$"),
      !incidentid %in% excludedfires ~ name
    )
  ) %>%
  rename(
    ` ` = name, State = state, Year = year, Destroyed = numdestroyed,
    Exposed = totalinperimeter, `Share Destroyed` = sharedestroyed
  )

ncal <- fulltable %>%
  filter(ca) %>%
  nrow()
nother <- fulltable %>%
  filter(!ca) %>%
  nrow()

kbl(fulltable %>% select(-ca, -incidentid),
  format = "latex",
  linesep = "",
  digits = 2,
  booktabs = T,
  longtable = T,
  escape = F,
  caption = "Full List of Fires and Single Family Home Counts",
  label = "fires_list_all",
  format.args = list(big.mark = ",")
) %>%
  kable_styling(latex_options = c("repeat_header")) %>%
  pack_rows("California", 1, ncal) %>% # https://haozhu233.github.io/kableExtra/awesome_table_in_html.html#Grouped_Columns__Rows
  pack_rows("Other States", ncal + 1, ncal + nother) %>%
  write(file.path(RES, "fires_list_all.tex"))

rm(table, fulltable)

## Map of fires in data -------

f <- data %>%
  group_by(incidentid) %>%
  summarize(n = n())

m <- qread(file.path(WORKING, "cleanedperimeters.qs")) %>%
  rename(incidentid = UNIT_AND_NUMBER) %>%
  mutate(yearperim = as.numeric(yearperim)) %>%
  merge(f)

setdiff(f$incidentid, m$incidentid) # Make sure I've got all the incidents.

states <- read_sf(file.path(INPUT_PUBLIC, "CensusTiger", "tl_2020_us_state", "tl_2020_us_state.shp")) %>%
  filter(NAME %in% c(
    "New Mexico", "Oregon", "California", "Colorado",
    "Wyoming", "Montana", "Washington", "Utah", "Nevada",
    "Arizona", "Idaho"
  )) %>%
  st_transform(st_crs(m))

map <- ggplot(data = m) +
  geom_sf(data = states, color = "black", fill = NA) +
  geom_sf(color = "red", size = 0.5, alpha = 0.7) +
  theme_map()

save_plot(file.path(RES, "summary_map.pdf"), plot = map)

# Timeline of fires in data -----

timeline <- data %>%
  mutate(year = year(incidentstartdate)) %>%
  group_by(incidentid) %>%
  summarize(
    ca = first(State == "California"),
    state = first(State),
    year = mean(year),
    startdate = min(incidentstartdate)
  ) %>%
  mutate(source = case_when(
    ca == TRUE & year >= 2013 ~ "California DINS",
    ca == TRUE & year < 2013 ~ "California Other Sources",
    ca == FALSE ~ "Other States (County Assessors)"
  ))

p <- ggplot(data = timeline) +
  geom_segment(aes(x = startdate, xend = startdate, y = 0, yend = 1, color = source), size = 0.75, alpha = .5) +
  scale_color_discrete() +
  theme_classic() +
  theme(
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(size = 14),
    axis.title.x = element_blank(),
    legend.position = "bottom",
    legend.title = element_blank(),
    legend.text = element_text(size = 12)
  )

save_plot(file.path(RES, "fires_timeline.pdf"), plot = p, base_asp = 2.75)

## How many fires? (For slides)

timeline %>%
  group_by(incidentid) %>%
  summarize(n())

## Table of non-CA counties we checked for building code presence ----

# Filter to all homes outside of CA and count the number of unique counties
x <- data %>%
  filter(regime %in% c("otherstates", "exclude_due_to_local_codes")) %>%
  group_by(regime, incidentid, FIPS, State) %>%
  summarize(homes_burned = sum(destroyed == T), homes_exposed = n()) %>%
  mutate(FIPS = sprintf("%05g", FIPS)) %>%
  mutate(county_name = fipio::fips_county(FIPS))
write_csv(x, file.path(RES, "counties-to-check-for-building-codes.csv"))
